Goal/Purpose of operations:
PCA analysis of the the new new cancers (lung, pancreas, liver) to evulate and determine any covariate. Panceratic cancer had a less common subtype driving the PC1. However, it did not solve the issue that most cancers the tumor and normal are split in PC1, PC2, PC3. I checked 10 PCs and double checked the sample labels. This is due to low tumor purity in pancreatic cancers.
liver cancer shows split between tumor and normal. Liver and lung seems to be influence by the time the tissue was removed to RNA prep (also seen in the RIN score). There is not a good/easy way to use this a covariate because the samples between GTEX and TCGA will vary, but important to note here and other place to highlight a limitation.
Finished psedocode on:
220524
System which operations were done on:
my laptop
GitHub Repo:
Transfer_Learning_R03
Docker:
rstudio_cancer_dr
Directory of operations:
/home
Scripts being edited for operations:
NA
Data being used:
Recount3
Papers and tools:
DESeq2
prcomp
library(recount3)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(SummarizedExperiment)
library(stringr)
PAAD
paad_tcga_metadata <- readRDS("~/data/recount3/recount3_fix_download/paad_tcga_metadata.rds")
paad_tcga_counts <- readRDS("~/data/recount3/recount3_fix_download/paad_tcga_counts.rds")
check counts and metadata info
dim(paad_tcga_counts)
## [1] 63856 183
dim(paad_tcga_metadata)
## [1] 183 840
nchar(colnames(paad_tcga_counts)[1])
## [1] 37
#https://stackoverflow.com/questions/7963898/extracting-the-last-n-characters-from-a-string-in-r
substrRight <- function(x, n){
substr(x, nchar(x)-n+1, nchar(x))
}
ids<- substrRight(colnames(paad_tcga_counts), 36)
ids<- str_replace_all(ids, "[[:punct:]]", "-")
colnames(paad_tcga_counts) <- ids
paad_tcga_counts_order <- paad_tcga_counts[,order(ids)]
paad_tcga_metadata_order<- paad_tcga_metadata[order(paad_tcga_metadata$external_id), ]
identical(colnames(paad_tcga_counts_order), paad_tcga_metadata_order$external_id)
## [1] TRUE
colnames(paad_tcga_counts_order)<- paad_tcga_metadata_order$tcga_barcode
saveRDS(paad_tcga_counts_order, "/home/rstudio/data/paad_tcga_count_ordered.rds")
saveRDS(paad_tcga_metadata_order, "/home/rstudio/data/paad_tcga_metadata_ordered.rds")
import the pancreas data
pan_gtex_counts <- readRDS("~/data/recount3/recount3_fix_download/pan_gtex_counts.rds")
pan_gtex_metadata <- readRDS("~/data/recount3/recount3_fix_download/pan_gtex_metadata.rds")
colnames(pan_gtex_counts)[1:5]
## [1] "GTEX.WFG7.0426.SM.4LMK5.1" "GTEX.1211K.1126.SM.5EGGB.1"
## [3] "GTEX.13FTX.1226.SM.5IFGN.1" "GTEX.145MN.1426.SM.5SI9H.1"
## [5] "GTEX.13O61.2126.SM.5IJEO.1"
pan_gtex_metadata$external_id[1:5]
## [1] "GTEX-111CU-0526-SM-5EGHK.1" "GTEX-111YS-1226-SM-5EGGJ.1"
## [3] "GTEX-1122O-0726-SM-5GIEV.1" "GTEX-1128S-0826-SM-5GZZI.1"
## [5] "GTEX-117YX-0226-SM-5EGH6.1"
ids<- str_replace_all(colnames(pan_gtex_counts), "[[:punct:]]", "-")
meta_data_ids<- str_replace_all(pan_gtex_metadata$external_id, "[[:punct:]]", "-")
identical(ids[order(ids)], meta_data_ids[order(meta_data_ids)])
## [1] TRUE
pan_gtex_counts_order <- pan_gtex_counts[,order(ids)]
pan_gtex_metadata_order<- pan_gtex_metadata[order(meta_data_ids), ]
colnames(pan_gtex_counts_order )<- pan_gtex_metadata_order$external_id
recount3_count_pan <- as.data.frame(pan_gtex_counts_order)
saveRDS(recount3_count_pan, "/home/rstudio/data/pan_gtex_count_ordered.rds")
saveRDS(pan_gtex_metadata_order, "/home/rstudio/data/pan_gtex_metadata_ordered.rds")
library(DESeq2)
#colData(recount3_rse_PANCREAS)
vst_table <- vst(as.matrix(recount3_count_pan))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 30.0452 24.42454 22.20122 17.48497 15.98831 15.41468
## Proportion of Variance 0.1084 0.07166 0.05921 0.03673 0.03071 0.02854
## Cumulative Proportion 0.1084 0.18010 0.23931 0.27604 0.30675 0.33529
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 13.82431 13.2224 12.49573 11.92589 11.18827 10.42483
## Proportion of Variance 0.02296 0.0210 0.01876 0.01709 0.01504 0.01306
## Cumulative Proportion 0.35825 0.3792 0.39801 0.41509 0.43013 0.44319
## PC13 PC14 PC15 PC16 PC17 PC18 PC19
## Standard deviation 10.29831 9.53619 9.19273 8.93092 8.4630 8.39932 8.27100
## Proportion of Variance 0.01274 0.01092 0.01015 0.00958 0.0086 0.00847 0.00822
## Cumulative Proportion 0.45593 0.46685 0.47700 0.48658 0.4952 0.50366 0.51188
## PC20 PC21 PC22 PC23 PC24 PC25 PC26
## Standard deviation 8.17212 7.80879 7.46373 7.39137 7.25817 6.91705 6.87114
## Proportion of Variance 0.00802 0.00733 0.00669 0.00656 0.00633 0.00575 0.00567
## Cumulative Proportion 0.51990 0.52723 0.53392 0.54048 0.54681 0.55256 0.55823
## PC27 PC28 PC29 PC30 PC31 PC32 PC33
## Standard deviation 6.68168 6.60970 6.36138 6.30886 6.09662 5.90330 5.86881
## Proportion of Variance 0.00536 0.00525 0.00486 0.00478 0.00446 0.00419 0.00414
## Cumulative Proportion 0.56359 0.56884 0.57370 0.57848 0.58295 0.58713 0.59127
## PC34 PC35 PC36 PC37 PC38 PC39 PC40
## Standard deviation 5.74068 5.67335 5.64255 5.5463 5.48430 5.38641 5.31574
## Proportion of Variance 0.00396 0.00387 0.00382 0.0037 0.00361 0.00349 0.00339
## Cumulative Proportion 0.59523 0.59910 0.60292 0.6066 0.61023 0.61372 0.61711
## PC41 PC42 PC43 PC44 PC45 PC46 PC47
## Standard deviation 5.2437 5.21458 5.17741 5.14322 5.13541 5.04757 4.98279
## Proportion of Variance 0.0033 0.00327 0.00322 0.00318 0.00317 0.00306 0.00298
## Cumulative Proportion 0.6204 0.62368 0.62690 0.63008 0.63324 0.63631 0.63929
## PC48 PC49 PC50 PC51 PC52 PC53 PC54
## Standard deviation 4.95204 4.88953 4.8310 4.79131 4.76808 4.73406 4.65916
## Proportion of Variance 0.00295 0.00287 0.0028 0.00276 0.00273 0.00269 0.00261
## Cumulative Proportion 0.64223 0.64511 0.6479 0.65067 0.65340 0.65609 0.65870
## PC55 PC56 PC57 PC58 PC59 PC60 PC61
## Standard deviation 4.6532 4.61455 4.58852 4.55207 4.53557 4.50429 4.43300
## Proportion of Variance 0.0026 0.00256 0.00253 0.00249 0.00247 0.00244 0.00236
## Cumulative Proportion 0.6613 0.66386 0.66639 0.66888 0.67135 0.67378 0.67614
## PC62 PC63 PC64 PC65 PC66 PC67 PC68
## Standard deviation 4.39124 4.3779 4.34156 4.32945 4.29285 4.2801 4.21635
## Proportion of Variance 0.00232 0.0023 0.00226 0.00225 0.00221 0.0022 0.00214
## Cumulative Proportion 0.67846 0.6808 0.68303 0.68528 0.68749 0.6897 0.69183
## PC69 PC70 PC71 PC72 PC73 PC74 PC75
## Standard deviation 4.20065 4.19632 4.16015 4.11565 4.10750 4.09884 4.0757
## Proportion of Variance 0.00212 0.00212 0.00208 0.00203 0.00203 0.00202 0.0020
## Cumulative Proportion 0.69395 0.69606 0.69814 0.70018 0.70220 0.70422 0.7062
## PC76 PC77 PC78 PC79 PC80 PC81 PC82
## Standard deviation 4.06148 4.03849 4.01996 3.99225 3.95753 3.93899 3.92053
## Proportion of Variance 0.00198 0.00196 0.00194 0.00191 0.00188 0.00186 0.00185
## Cumulative Proportion 0.70820 0.71016 0.71210 0.71402 0.71590 0.71776 0.71961
## PC83 PC84 PC85 PC86 PC87 PC88 PC89
## Standard deviation 3.89028 3.8750 3.8697 3.85486 3.85328 3.83204 3.81567
## Proportion of Variance 0.00182 0.0018 0.0018 0.00179 0.00178 0.00176 0.00175
## Cumulative Proportion 0.72142 0.7232 0.7250 0.72681 0.72860 0.73036 0.73211
## PC90 PC91 PC92 PC93 PC94 PC95 PC96
## Standard deviation 3.78876 3.78517 3.7606 3.7569 3.72901 3.71584 3.70743
## Proportion of Variance 0.00172 0.00172 0.0017 0.0017 0.00167 0.00166 0.00165
## Cumulative Proportion 0.73383 0.73555 0.7372 0.7389 0.74062 0.74228 0.74393
## PC97 PC98 PC99 PC100 PC101 PC102 PC103
## Standard deviation 3.70552 3.67585 3.66773 3.66004 3.64324 3.63664 3.61695
## Proportion of Variance 0.00165 0.00162 0.00162 0.00161 0.00159 0.00159 0.00157
## Cumulative Proportion 0.74558 0.74720 0.74882 0.75043 0.75202 0.75361 0.75518
## PC104 PC105 PC106 PC107 PC108 PC109 PC110
## Standard deviation 3.59102 3.58566 3.56702 3.55408 3.54025 3.5280 3.52453
## Proportion of Variance 0.00155 0.00154 0.00153 0.00152 0.00151 0.0015 0.00149
## Cumulative Proportion 0.75673 0.75828 0.75980 0.76132 0.76283 0.7643 0.76581
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## Standard deviation 3.50914 3.49500 3.47517 3.46063 3.45819 3.44449 3.44035
## Proportion of Variance 0.00148 0.00147 0.00145 0.00144 0.00144 0.00143 0.00142
## Cumulative Proportion 0.76729 0.76876 0.77021 0.77165 0.77309 0.77451 0.77593
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## Standard deviation 3.43398 3.42124 3.4163 3.40766 3.40026 3.39654 3.37905
## Proportion of Variance 0.00142 0.00141 0.0014 0.00139 0.00139 0.00139 0.00137
## Cumulative Proportion 0.77735 0.77876 0.7802 0.78155 0.78294 0.78433 0.78570
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## Standard deviation 3.37470 3.36001 3.35779 3.34735 3.34317 3.33246 3.32917
## Proportion of Variance 0.00137 0.00136 0.00135 0.00135 0.00134 0.00133 0.00133
## Cumulative Proportion 0.78707 0.78842 0.78978 0.79112 0.79247 0.79380 0.79513
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## Standard deviation 3.31455 3.30845 3.2928 3.2904 3.2836 3.27397 3.26140
## Proportion of Variance 0.00132 0.00131 0.0013 0.0013 0.0013 0.00129 0.00128
## Cumulative Proportion 0.79645 0.79777 0.7991 0.8004 0.8017 0.80295 0.80423
## PC139 PC140 PC141 PC142 PC143 PC144 PC145
## Standard deviation 3.25339 3.25126 3.24309 3.23593 3.23245 3.21763 3.21064
## Proportion of Variance 0.00127 0.00127 0.00126 0.00126 0.00126 0.00124 0.00124
## Cumulative Proportion 0.80550 0.80677 0.80804 0.80929 0.81055 0.81179 0.81303
## PC146 PC147 PC148 PC149 PC150 PC151 PC152
## Standard deviation 3.20309 3.19907 3.18999 3.18243 3.18105 3.16820 3.1634
## Proportion of Variance 0.00123 0.00123 0.00122 0.00122 0.00122 0.00121 0.0012
## Cumulative Proportion 0.81426 0.81549 0.81672 0.81793 0.81915 0.82035 0.8216
## PC153 PC154 PC155 PC156 PC157 PC158 PC159
## Standard deviation 3.1606 3.1558 3.14554 3.14009 3.13754 3.13344 3.12557
## Proportion of Variance 0.0012 0.0012 0.00119 0.00118 0.00118 0.00118 0.00117
## Cumulative Proportion 0.8228 0.8239 0.82514 0.82632 0.82751 0.82869 0.82986
## PC160 PC161 PC162 PC163 PC164 PC165 PC166
## Standard deviation 3.11755 3.11395 3.10581 3.09356 3.08938 3.08305 3.07873
## Proportion of Variance 0.00117 0.00116 0.00116 0.00115 0.00115 0.00114 0.00114
## Cumulative Proportion 0.83103 0.83219 0.83335 0.83450 0.83565 0.83679 0.83793
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## Standard deviation 3.07462 3.06430 3.06022 3.05670 3.05033 3.04684 3.04265
## Proportion of Variance 0.00114 0.00113 0.00112 0.00112 0.00112 0.00112 0.00111
## Cumulative Proportion 0.83906 0.84019 0.84132 0.84244 0.84356 0.84467 0.84578
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## Standard deviation 3.03959 3.0319 3.0270 3.01503 3.01049 3.00594 2.99494
## Proportion of Variance 0.00111 0.0011 0.0011 0.00109 0.00109 0.00109 0.00108
## Cumulative Proportion 0.84689 0.8480 0.8491 0.85019 0.85128 0.85237 0.85344
## PC181 PC182 PC183 PC184 PC185 PC186 PC187
## Standard deviation 2.99439 2.99037 2.98379 2.97946 2.97711 2.97043 2.96251
## Proportion of Variance 0.00108 0.00107 0.00107 0.00107 0.00106 0.00106 0.00105
## Cumulative Proportion 0.85452 0.85559 0.85666 0.85773 0.85879 0.85985 0.86091
## PC188 PC189 PC190 PC191 PC192 PC193 PC194
## Standard deviation 2.95961 2.95624 2.95141 2.94277 2.94109 2.93892 2.92771
## Proportion of Variance 0.00105 0.00105 0.00105 0.00104 0.00104 0.00104 0.00103
## Cumulative Proportion 0.86196 0.86301 0.86406 0.86510 0.86614 0.86717 0.86820
## PC195 PC196 PC197 PC198 PC199 PC200 PC201
## Standard deviation 2.92429 2.92160 2.91835 2.91460 2.90932 2.90439 2.90132
## Proportion of Variance 0.00103 0.00103 0.00102 0.00102 0.00102 0.00101 0.00101
## Cumulative Proportion 0.86923 0.87026 0.87128 0.87230 0.87332 0.87433 0.87534
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## Standard deviation 2.89606 2.89341 2.8871 2.8840 2.87691 2.87161 2.86633
## Proportion of Variance 0.00101 0.00101 0.0010 0.0010 0.00099 0.00099 0.00099
## Cumulative Proportion 0.87635 0.87735 0.8784 0.8794 0.88035 0.88134 0.88233
## PC209 PC210 PC211 PC212 PC213 PC214 PC215
## Standard deviation 2.86406 2.85968 2.85100 2.84725 2.84085 2.83965 2.83553
## Proportion of Variance 0.00099 0.00098 0.00098 0.00097 0.00097 0.00097 0.00097
## Cumulative Proportion 0.88331 0.88429 0.88527 0.88624 0.88721 0.88818 0.88915
## PC216 PC217 PC218 PC219 PC220 PC221 PC222
## Standard deviation 2.83486 2.82591 2.82280 2.82101 2.81837 2.81104 2.80575
## Proportion of Variance 0.00097 0.00096 0.00096 0.00096 0.00095 0.00095 0.00095
## Cumulative Proportion 0.89011 0.89107 0.89203 0.89299 0.89394 0.89489 0.89584
## PC223 PC224 PC225 PC226 PC227 PC228 PC229
## Standard deviation 2.80220 2.80051 2.79801 2.79246 2.79073 2.78776 2.78263
## Proportion of Variance 0.00094 0.00094 0.00094 0.00094 0.00094 0.00093 0.00093
## Cumulative Proportion 0.89678 0.89772 0.89866 0.89960 0.90053 0.90147 0.90240
## PC230 PC231 PC232 PC233 PC234 PC235 PC236
## Standard deviation 2.77758 2.77580 2.77156 2.76483 2.76372 2.75992 2.75661
## Proportion of Variance 0.00093 0.00093 0.00092 0.00092 0.00092 0.00092 0.00091
## Cumulative Proportion 0.90332 0.90425 0.90517 0.90609 0.90701 0.90792 0.90884
## PC237 PC238 PC239 PC240 PC241 PC242 PC243
## Standard deviation 2.75409 2.75153 2.74683 2.7435 2.7381 2.7352 2.72938
## Proportion of Variance 0.00091 0.00091 0.00091 0.0009 0.0009 0.0009 0.00089
## Cumulative Proportion 0.90975 0.91066 0.91156 0.9125 0.9134 0.9143 0.91516
## PC244 PC245 PC246 PC247 PC248 PC249 PC250
## Standard deviation 2.72086 2.71925 2.71426 2.71126 2.70440 2.70197 2.69713
## Proportion of Variance 0.00089 0.00089 0.00089 0.00088 0.00088 0.00088 0.00087
## Cumulative Proportion 0.91605 0.91694 0.91782 0.91871 0.91959 0.92046 0.92134
## PC251 PC252 PC253 PC254 PC255 PC256 PC257
## Standard deviation 2.69472 2.69004 2.68899 2.68627 2.68273 2.67442 2.67177
## Proportion of Variance 0.00087 0.00087 0.00087 0.00087 0.00086 0.00086 0.00086
## Cumulative Proportion 0.92221 0.92308 0.92395 0.92481 0.92568 0.92654 0.92740
## PC258 PC259 PC260 PC261 PC262 PC263 PC264
## Standard deviation 2.66873 2.66397 2.66311 2.65899 2.65795 2.65231 2.64553
## Proportion of Variance 0.00086 0.00085 0.00085 0.00085 0.00085 0.00085 0.00084
## Cumulative Proportion 0.92825 0.92910 0.92996 0.93080 0.93165 0.93250 0.93334
## PC265 PC266 PC267 PC268 PC269 PC270 PC271
## Standard deviation 2.64220 2.64058 2.63780 2.62962 2.62802 2.62607 2.62027
## Proportion of Variance 0.00084 0.00084 0.00084 0.00083 0.00083 0.00083 0.00082
## Cumulative Proportion 0.93418 0.93502 0.93585 0.93668 0.93751 0.93834 0.93916
## PC272 PC273 PC274 PC275 PC276 PC277 PC278
## Standard deviation 2.61739 2.61076 2.60683 2.60438 2.60217 2.60122 2.59551
## Proportion of Variance 0.00082 0.00082 0.00082 0.00081 0.00081 0.00081 0.00081
## Cumulative Proportion 0.93999 0.94081 0.94162 0.94244 0.94325 0.94406 0.94487
## PC279 PC280 PC281 PC282 PC283 PC284 PC285
## Standard deviation 2.59037 2.5859 2.5807 2.5793 2.57187 2.56755 2.56524
## Proportion of Variance 0.00081 0.0008 0.0008 0.0008 0.00079 0.00079 0.00079
## Cumulative Proportion 0.94568 0.9465 0.9473 0.9481 0.94888 0.94967 0.95046
## PC286 PC287 PC288 PC289 PC290 PC291 PC292
## Standard deviation 2.56359 2.55918 2.55455 2.55357 2.55222 2.54862 2.54228
## Proportion of Variance 0.00079 0.00079 0.00078 0.00078 0.00078 0.00078 0.00078
## Cumulative Proportion 0.95125 0.95204 0.95282 0.95360 0.95438 0.95517 0.95594
## PC293 PC294 PC295 PC296 PC297 PC298 PC299
## Standard deviation 2.53621 2.53426 2.52963 2.52623 2.52338 2.51839 2.51624
## Proportion of Variance 0.00077 0.00077 0.00077 0.00077 0.00076 0.00076 0.00076
## Cumulative Proportion 0.95671 0.95749 0.95825 0.95902 0.95979 0.96055 0.96131
## PC300 PC301 PC302 PC303 PC304 PC305 PC306
## Standard deviation 2.50972 2.50680 2.50170 2.49927 2.49726 2.49308 2.49225
## Proportion of Variance 0.00076 0.00075 0.00075 0.00075 0.00075 0.00075 0.00075
## Cumulative Proportion 0.96207 0.96282 0.96357 0.96432 0.96507 0.96582 0.96656
## PC307 PC308 PC309 PC310 PC311 PC312 PC313
## Standard deviation 2.47994 2.47630 2.47078 2.46630 2.45976 2.45874 2.45698
## Proportion of Variance 0.00074 0.00074 0.00073 0.00073 0.00073 0.00073 0.00073
## Cumulative Proportion 0.96730 0.96804 0.96877 0.96950 0.97023 0.97096 0.97168
## PC314 PC315 PC316 PC317 PC318 PC319 PC320
## Standard deviation 2.45028 2.44527 2.44052 2.43661 2.42623 2.42522 2.4223
## Proportion of Variance 0.00072 0.00072 0.00072 0.00071 0.00071 0.00071 0.0007
## Cumulative Proportion 0.97240 0.97312 0.97384 0.97455 0.97526 0.97596 0.9767
## PC321 PC322 PC323 PC324 PC325 PC326 PC327
## Standard deviation 2.4167 2.4080 2.4075 2.39650 2.39152 2.39005 2.38893
## Proportion of Variance 0.0007 0.0007 0.0007 0.00069 0.00069 0.00069 0.00069
## Cumulative Proportion 0.9774 0.9781 0.9788 0.97945 0.98014 0.98083 0.98151
## PC328 PC329 PC330 PC331 PC332 PC333 PC334
## Standard deviation 2.38257 2.37905 2.37524 2.37151 2.36535 2.35834 2.35584
## Proportion of Variance 0.00068 0.00068 0.00068 0.00068 0.00067 0.00067 0.00067
## Cumulative Proportion 0.98219 0.98287 0.98355 0.98423 0.98490 0.98557 0.98623
## PC335 PC336 PC337 PC338 PC339 PC340 PC341
## Standard deviation 2.34905 2.34702 2.34303 2.33635 2.32320 2.31386 2.31037
## Proportion of Variance 0.00066 0.00066 0.00066 0.00066 0.00065 0.00064 0.00064
## Cumulative Proportion 0.98690 0.98756 0.98822 0.98887 0.98952 0.99016 0.99081
## PC342 PC343 PC344 PC345 PC346 PC347 PC348
## Standard deviation 2.29992 2.29480 2.28931 2.28136 2.26866 2.24575 2.2294
## Proportion of Variance 0.00064 0.00063 0.00063 0.00063 0.00062 0.00061 0.0006
## Cumulative Proportion 0.99144 0.99207 0.99270 0.99333 0.99395 0.99455 0.9951
## PC349 PC350 PC351 PC352 PC353 PC354 PC355
## Standard deviation 2.21130 2.19308 2.16722 2.16321 2.12501 2.10110 2.09759
## Proportion of Variance 0.00059 0.00058 0.00056 0.00056 0.00054 0.00053 0.00053
## Cumulative Proportion 0.99574 0.99632 0.99688 0.99744 0.99798 0.99851 0.99904
## PC356 PC357 PC358 PC359 PC360
## Standard deviation 2.0404 1.95019 1.459e-13 1.334e-14 5.966e-16
## Proportion of Variance 0.0005 0.00046 0.000e+00 0.000e+00 0.000e+00
## Cumulative Proportion 0.9995 1.00000 1.000e+00 1.000e+00 1.000e+00
sex<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$SEX == "2"]
sex<- sex[!is.na(sex)]
tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% sex, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("female", "male"), pch = 21, pt.bg = c("red", "black"), col = "black")
age<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$AGE == "70-79"]
age<- age[!is.na(age)]
tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("70-79", "! 70-79"), pch = 21, pt.bg = c("red", "black"), col = "black")
age<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$AGE == "20-29"]
age<- age[!is.na(age)]
#normal_ids<- rownames(recount3_rse_PANCREAS@colData)[rownames(recount3_rse_PANCREAS@colData) %in% sex]
tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("20-29", "! 20-29"), pch = 21, pt.bg = c("red", "black"), col = "black")
looking at RIN scores
RIN<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$SMRIN >= 7]
RIN<- RIN[!is.na(RIN)]
tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% RIN, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 7", "< 7"), pch = 21, pt.bg = c("red", "black"), col = "black")
SMTSISCH- indicates the minutes of ischmia time
time<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$SMTSISCH >= 500]
time<- time[!is.na(time)]
tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% time, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 500", "< 500"), pch = 21, pt.bg = c("red", "black"), col = "black")
PAAD
vst_table <- vst(as.matrix(paad_tcga_counts_order))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 63.542 58.4378 45.33777 40.66603 35.36355 33.69370
## Proportion of Variance 0.127 0.1074 0.06466 0.05202 0.03934 0.03571
## Cumulative Proportion 0.127 0.2344 0.29912 0.35114 0.39048 0.42620
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 29.60559 25.2759 22.43073 21.44785 20.74062 19.41519
## Proportion of Variance 0.02757 0.0201 0.01583 0.01447 0.01353 0.01186
## Cumulative Proportion 0.45377 0.4739 0.48970 0.50417 0.51770 0.52956
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 19.17076 18.17359 18.06052 17.78557 17.1918 16.27709
## Proportion of Variance 0.01156 0.01039 0.01026 0.00995 0.0093 0.00833
## Cumulative Proportion 0.54112 0.55151 0.56177 0.57172 0.5810 0.58936
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 16.01044 15.87386 15.59041 15.34758 14.77060 14.60923
## Proportion of Variance 0.00806 0.00793 0.00765 0.00741 0.00686 0.00671
## Cumulative Proportion 0.59742 0.60535 0.61299 0.62040 0.62727 0.63398
## PC25 PC26 PC27 PC28 PC29 PC30
## Standard deviation 14.46102 14.19910 13.9263 13.76954 13.48942 13.20125
## Proportion of Variance 0.00658 0.00634 0.0061 0.00596 0.00572 0.00548
## Cumulative Proportion 0.64056 0.64690 0.6530 0.65897 0.66469 0.67018
## PC31 PC32 PC33 PC34 PC35 PC36
## Standard deviation 13.03449 12.83102 12.72018 12.67710 12.43568 12.2243
## Proportion of Variance 0.00534 0.00518 0.00509 0.00506 0.00487 0.0047
## Cumulative Proportion 0.67552 0.68070 0.68579 0.69085 0.69571 0.7004
## PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 11.9649 11.73132 11.57543 11.48083 11.46756 11.40348
## Proportion of Variance 0.0045 0.00433 0.00422 0.00415 0.00414 0.00409
## Cumulative Proportion 0.7049 0.70924 0.71346 0.71761 0.72174 0.72583
## PC43 PC44 PC45 PC46 PC47 PC48
## Standard deviation 11.23405 11.20859 11.1385 10.8415 10.82423 10.6965
## Proportion of Variance 0.00397 0.00395 0.0039 0.0037 0.00369 0.0036
## Cumulative Proportion 0.72980 0.73376 0.7377 0.7414 0.74504 0.7486
## PC49 PC50 PC51 PC52 PC53 PC54
## Standard deviation 10.66931 10.51512 10.43732 10.37509 10.29633 10.20617
## Proportion of Variance 0.00358 0.00348 0.00343 0.00339 0.00334 0.00328
## Cumulative Proportion 0.75222 0.75570 0.75913 0.76252 0.76585 0.76913
## PC55 PC56 PC57 PC58 PC59 PC60 PC61
## Standard deviation 10.14409 10.0894 9.95004 9.93803 9.81732 9.74572 9.71099
## Proportion of Variance 0.00324 0.0032 0.00311 0.00311 0.00303 0.00299 0.00297
## Cumulative Proportion 0.77237 0.7756 0.77868 0.78179 0.78482 0.78781 0.79078
## PC62 PC63 PC64 PC65 PC66 PC67 PC68
## Standard deviation 9.66454 9.62211 9.55169 9.46667 9.40055 9.39645 9.2606
## Proportion of Variance 0.00294 0.00291 0.00287 0.00282 0.00278 0.00278 0.0027
## Cumulative Proportion 0.79371 0.79663 0.79950 0.80232 0.80510 0.80787 0.8106
## PC69 PC70 PC71 PC72 PC73 PC74 PC75
## Standard deviation 9.15686 9.12400 9.0973 9.02846 8.93559 8.9082 8.87756
## Proportion of Variance 0.00264 0.00262 0.0026 0.00256 0.00251 0.0025 0.00248
## Cumulative Proportion 0.81321 0.81583 0.8184 0.82100 0.82351 0.8260 0.82848
## PC76 PC77 PC78 PC79 PC80 PC81 PC82
## Standard deviation 8.85408 8.80841 8.78689 8.7356 8.65563 8.58392 8.57598
## Proportion of Variance 0.00247 0.00244 0.00243 0.0024 0.00236 0.00232 0.00231
## Cumulative Proportion 0.83095 0.83339 0.83582 0.8382 0.84058 0.84290 0.84521
## PC83 PC84 PC85 PC86 PC87 PC88 PC89
## Standard deviation 8.5430 8.43957 8.40618 8.39298 8.37405 8.32031 8.31543
## Proportion of Variance 0.0023 0.00224 0.00222 0.00222 0.00221 0.00218 0.00218
## Cumulative Proportion 0.8475 0.84975 0.85197 0.85418 0.85639 0.85857 0.86074
## PC90 PC91 PC92 PC93 PC94 PC95 PC96
## Standard deviation 8.23455 8.19348 8.1782 8.09924 8.08106 8.00326 7.99813
## Proportion of Variance 0.00213 0.00211 0.0021 0.00206 0.00205 0.00202 0.00201
## Cumulative Proportion 0.86288 0.86499 0.8671 0.86916 0.87121 0.87323 0.87524
## PC97 PC98 PC99 PC100 PC101 PC102 PC103
## Standard deviation 7.95493 7.92699 7.91154 7.87776 7.83079 7.81440 7.74768
## Proportion of Variance 0.00199 0.00198 0.00197 0.00195 0.00193 0.00192 0.00189
## Cumulative Proportion 0.87723 0.87921 0.88118 0.88313 0.88506 0.88698 0.88887
## PC104 PC105 PC106 PC107 PC108 PC109 PC110
## Standard deviation 7.73254 7.68775 7.67425 7.65054 7.62311 7.58639 7.54246
## Proportion of Variance 0.00188 0.00186 0.00185 0.00184 0.00183 0.00181 0.00179
## Cumulative Proportion 0.89075 0.89261 0.89446 0.89630 0.89813 0.89994 0.90173
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## Standard deviation 7.49990 7.46850 7.44663 7.42236 7.40376 7.38471 7.3515
## Proportion of Variance 0.00177 0.00175 0.00174 0.00173 0.00172 0.00172 0.0017
## Cumulative Proportion 0.90350 0.90525 0.90700 0.90873 0.91046 0.91217 0.9139
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## Standard deviation 7.32355 7.27552 7.24639 7.20976 7.18682 7.16815 7.15625
## Proportion of Variance 0.00169 0.00167 0.00165 0.00164 0.00162 0.00162 0.00161
## Cumulative Proportion 0.91556 0.91722 0.91888 0.92051 0.92214 0.92375 0.92536
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## Standard deviation 7.1277 7.10048 7.09176 7.06845 7.03026 6.99342 6.97223
## Proportion of Variance 0.0016 0.00159 0.00158 0.00157 0.00155 0.00154 0.00153
## Cumulative Proportion 0.9270 0.92855 0.93013 0.93170 0.93326 0.93479 0.93632
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## Standard deviation 6.96033 6.92977 6.88904 6.86298 6.83919 6.82806 6.80890
## Proportion of Variance 0.00152 0.00151 0.00149 0.00148 0.00147 0.00147 0.00146
## Cumulative Proportion 0.93785 0.93936 0.94085 0.94233 0.94381 0.94527 0.94673
## PC139 PC140 PC141 PC142 PC143 PC144 PC145
## Standard deviation 6.77021 6.75057 6.72850 6.70192 6.6795 6.6734 6.63239
## Proportion of Variance 0.00144 0.00143 0.00142 0.00141 0.0014 0.0014 0.00138
## Cumulative Proportion 0.94817 0.94961 0.95103 0.95244 0.9538 0.9553 0.95663
## PC146 PC147 PC148 PC149 PC150 PC151 PC152
## Standard deviation 6.59626 6.56552 6.55772 6.54689 6.53551 6.50186 6.48074
## Proportion of Variance 0.00137 0.00136 0.00135 0.00135 0.00134 0.00133 0.00132
## Cumulative Proportion 0.95800 0.95936 0.96071 0.96206 0.96340 0.96473 0.96605
## PC153 PC154 PC155 PC156 PC157 PC158 PC159
## Standard deviation 6.46046 6.41174 6.37304 6.35262 6.34031 6.33220 6.28801
## Proportion of Variance 0.00131 0.00129 0.00128 0.00127 0.00126 0.00126 0.00124
## Cumulative Proportion 0.96737 0.96866 0.96994 0.97121 0.97247 0.97373 0.97498
## PC160 PC161 PC162 PC163 PC164 PC165 PC166
## Standard deviation 6.26874 6.23603 6.21546 6.1750 6.14416 6.10423 6.07537
## Proportion of Variance 0.00124 0.00122 0.00122 0.0012 0.00119 0.00117 0.00116
## Cumulative Proportion 0.97621 0.97744 0.97865 0.9798 0.98104 0.98221 0.98337
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## Standard deviation 6.03681 5.99665 5.98718 5.96258 5.93709 5.83782 5.83298
## Proportion of Variance 0.00115 0.00113 0.00113 0.00112 0.00111 0.00107 0.00107
## Cumulative Proportion 0.98452 0.98565 0.98678 0.98790 0.98900 0.99008 0.99115
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## Standard deviation 5.80251 5.77446 5.72864 5.67400 5.66476 5.50592 5.46563
## Proportion of Variance 0.00106 0.00105 0.00103 0.00101 0.00101 0.00095 0.00094
## Cumulative Proportion 0.99221 0.99325 0.99429 0.99530 0.99631 0.99726 0.99820
## PC181 PC182 PC183
## Standard deviation 5.41786 5.26986 1.531e-13
## Proportion of Variance 0.00092 0.00087 0.000e+00
## Cumulative Proportion 0.99913 1.00000 1.000e+00
nt <- paad_tcga_metadata_order$external_id[paad_tcga_metadata_order$cgc_sample_sample_type == "Solid Tissue Normal"]
tumor_norm <- ifelse(paad_tcga_metadata_order$external_id %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC1 (12.7%)", ylab = "PC2 (10.74%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
test <- as.data.frame(paad_tcga_metadata_order[pca.tumor$x[, 1] >150,])
nt <- paad_tcga_metadata_order$external_id[grep("NEUROENDOCRINE",paad_tcga_metadata_order$cgc_case_other_histological_diagnosis, ignore.case = TRUE) ]
#normal_ids<- rownames(recount3_rse_PAAD@colData)[rownames(recount3_rse_PAAD@colData) %in% nt]
tumor_norm <- ifelse(paad_tcga_metadata_order$external_id %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC1 (12.7%)", ylab = "PC2 (10.74%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("other", "NEUROENDOCRINE tumors"), pch = 21, pt.bg = c("red", "black"), col = "black")
remove neuroendocrine tumors
nt <- paad_tcga_metadata_order$tcga_barcode[grep("NEUROENDOCRINE",paad_tcga_metadata_order$cgc_case_other_histological_diagnosis, ignore.case = TRUE) ]
metadata<- as.data.frame(paad_tcga_metadata_order)[! paad_tcga_metadata_order$tcga_barcode %in% nt]
vst_table_v2 <- vst_table[,!colnames(vst_table) %in% nt]
vst_table_df <- t(vst_table_v2)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 62.4031 46.88721 41.53646 36.47094 35.14795 30.09497
## Proportion of Variance 0.1367 0.07716 0.06056 0.04669 0.04336 0.03179
## Cumulative Proportion 0.1367 0.21384 0.27440 0.32108 0.36445 0.39623
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 28.97007 25.23217 22.4547 21.65683 20.47935 18.78061
## Proportion of Variance 0.02946 0.02235 0.0177 0.01646 0.01472 0.01238
## Cumulative Proportion 0.42569 0.44804 0.4657 0.48220 0.49692 0.50930
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 18.68885 18.19049 17.63742 16.58814 16.40326 15.88163
## Proportion of Variance 0.01226 0.01161 0.01092 0.00966 0.00944 0.00885
## Cumulative Proportion 0.52156 0.53317 0.54409 0.55375 0.56319 0.57205
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 15.58815 15.08570 14.94203 14.62833 14.2210 14.17681
## Proportion of Variance 0.00853 0.00799 0.00784 0.00751 0.0071 0.00705
## Cumulative Proportion 0.58057 0.58856 0.59640 0.60391 0.6110 0.61806
## PC25 PC26 PC27 PC28 PC29 PC30
## Standard deviation 13.74874 13.68578 13.43291 13.24861 13.15758 12.94523
## Proportion of Variance 0.00663 0.00657 0.00633 0.00616 0.00608 0.00588
## Cumulative Proportion 0.62470 0.63127 0.63760 0.64376 0.64984 0.65572
## PC31 PC32 PC33 PC34 PC35 PC36
## Standard deviation 12.59040 12.29439 11.89921 11.78698 11.75442 11.6898
## Proportion of Variance 0.00556 0.00531 0.00497 0.00488 0.00485 0.0048
## Cumulative Proportion 0.66129 0.66659 0.67156 0.67644 0.68129 0.6861
## PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 11.56517 11.44123 11.29835 11.25786 11.12692 10.97630
## Proportion of Variance 0.00469 0.00459 0.00448 0.00445 0.00435 0.00423
## Cumulative Proportion 0.69078 0.69537 0.69985 0.70430 0.70865 0.71288
## PC43 PC44 PC45 PC46 PC47 PC48
## Standard deviation 10.90700 10.8033 10.63793 10.57471 10.50130 10.42558
## Proportion of Variance 0.00418 0.0041 0.00397 0.00392 0.00387 0.00382
## Cumulative Proportion 0.71705 0.7211 0.72512 0.72905 0.73292 0.73673
## PC49 PC50 PC51 PC52 PC53 PC54
## Standard deviation 10.31270 10.25271 10.17429 10.10076 10.07905 9.96409
## Proportion of Variance 0.00373 0.00369 0.00363 0.00358 0.00357 0.00348
## Cumulative Proportion 0.74046 0.74415 0.74779 0.75137 0.75493 0.75842
## PC55 PC56 PC57 PC58 PC59 PC60 PC61
## Standard deviation 9.93805 9.88342 9.80848 9.75474 9.67990 9.49513 9.46514
## Proportion of Variance 0.00347 0.00343 0.00338 0.00334 0.00329 0.00316 0.00314
## Cumulative Proportion 0.76188 0.76531 0.76869 0.77203 0.77532 0.77848 0.78163
## PC62 PC63 PC64 PC65 PC66 PC67 PC68
## Standard deviation 9.41030 9.35724 9.27998 9.2392 9.19203 9.16247 9.10110
## Proportion of Variance 0.00311 0.00307 0.00302 0.0030 0.00297 0.00295 0.00291
## Cumulative Proportion 0.78474 0.78781 0.79083 0.7938 0.79679 0.79974 0.80265
## PC69 PC70 PC71 PC72 PC73 PC74 PC75
## Standard deviation 9.07221 9.05062 9.00027 8.9372 8.85509 8.83956 8.7717
## Proportion of Variance 0.00289 0.00288 0.00284 0.0028 0.00275 0.00274 0.0027
## Cumulative Proportion 0.80554 0.80841 0.81125 0.8141 0.81681 0.81955 0.8223
## PC76 PC77 PC78 PC79 PC80 PC81 PC82
## Standard deviation 8.69155 8.66293 8.64709 8.58798 8.56476 8.53237 8.49246
## Proportion of Variance 0.00265 0.00263 0.00262 0.00259 0.00257 0.00256 0.00253
## Cumulative Proportion 0.82490 0.82754 0.83016 0.83275 0.83533 0.83788 0.84041
## PC83 PC84 PC85 PC86 PC87 PC88 PC89
## Standard deviation 8.40163 8.38560 8.32498 8.29403 8.21743 8.20029 8.17371
## Proportion of Variance 0.00248 0.00247 0.00243 0.00241 0.00237 0.00236 0.00234
## Cumulative Proportion 0.84289 0.84536 0.84779 0.85021 0.85258 0.85494 0.85728
## PC90 PC91 PC92 PC93 PC94 PC95 PC96
## Standard deviation 8.15159 8.0889 8.07267 8.03202 7.97668 7.95963 7.94826
## Proportion of Variance 0.00233 0.0023 0.00229 0.00226 0.00223 0.00222 0.00222
## Cumulative Proportion 0.85961 0.8619 0.86420 0.86646 0.86869 0.87092 0.87314
## PC97 PC98 PC99 PC100 PC101 PC102 PC103
## Standard deviation 7.9163 7.87493 7.84841 7.82560 7.77542 7.7298 7.68903
## Proportion of Variance 0.0022 0.00218 0.00216 0.00215 0.00212 0.0021 0.00208
## Cumulative Proportion 0.8753 0.87751 0.87967 0.88182 0.88395 0.8860 0.88812
## PC104 PC105 PC106 PC107 PC108 PC109 PC110
## Standard deviation 7.68714 7.65129 7.61265 7.60194 7.57893 7.5422 7.53265
## Proportion of Variance 0.00207 0.00205 0.00203 0.00203 0.00202 0.0020 0.00199
## Cumulative Proportion 0.89019 0.89225 0.89428 0.89631 0.89832 0.9003 0.90231
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## Standard deviation 7.46367 7.41557 7.39365 7.36910 7.3620 7.33842 7.31735
## Proportion of Variance 0.00196 0.00193 0.00192 0.00191 0.0019 0.00189 0.00188
## Cumulative Proportion 0.90427 0.90620 0.90812 0.91002 0.9119 0.91382 0.91570
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## Standard deviation 7.29276 7.25964 7.24399 7.18968 7.17512 7.14799 7.11589
## Proportion of Variance 0.00187 0.00185 0.00184 0.00181 0.00181 0.00179 0.00178
## Cumulative Proportion 0.91756 0.91941 0.92125 0.92307 0.92487 0.92667 0.92845
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## Standard deviation 7.08893 7.06877 7.05156 7.02036 6.99566 6.97352 6.93825
## Proportion of Variance 0.00176 0.00175 0.00175 0.00173 0.00172 0.00171 0.00169
## Cumulative Proportion 0.93021 0.93196 0.93371 0.93544 0.93716 0.93886 0.94055
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## Standard deviation 6.91710 6.89225 6.88361 6.85781 6.83451 6.78593 6.76803
## Proportion of Variance 0.00168 0.00167 0.00166 0.00165 0.00164 0.00162 0.00161
## Cumulative Proportion 0.94223 0.94390 0.94556 0.94721 0.94885 0.95047 0.95208
## PC139 PC140 PC141 PC142 PC143 PC144 PC145
## Standard deviation 6.73768 6.72154 6.71289 6.68377 6.68042 6.64084 6.59101
## Proportion of Variance 0.00159 0.00159 0.00158 0.00157 0.00157 0.00155 0.00152
## Cumulative Proportion 0.95367 0.95526 0.95684 0.95841 0.95997 0.96152 0.96304
## PC146 PC147 PC148 PC149 PC150 PC151 PC152
## Standard deviation 6.57244 6.5270 6.52185 6.49955 6.47523 6.45629 6.40909
## Proportion of Variance 0.00152 0.0015 0.00149 0.00148 0.00147 0.00146 0.00144
## Cumulative Proportion 0.96456 0.9661 0.96755 0.96903 0.97050 0.97197 0.97341
## PC153 PC154 PC155 PC156 PC157 PC158 PC159
## Standard deviation 6.38055 6.36164 6.3069 6.26724 6.23827 6.20937 6.15954
## Proportion of Variance 0.00143 0.00142 0.0014 0.00138 0.00137 0.00135 0.00133
## Cumulative Proportion 0.97484 0.97626 0.9777 0.97903 0.98040 0.98175 0.98308
## PC160 PC161 PC162 PC163 PC164 PC165 PC166
## Standard deviation 6.14947 6.13003 6.10119 6.00558 5.98531 5.95604 5.93735
## Proportion of Variance 0.00133 0.00132 0.00131 0.00127 0.00126 0.00125 0.00124
## Cumulative Proportion 0.98441 0.98573 0.98704 0.98830 0.98956 0.99080 0.99204
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## Standard deviation 5.90130 5.8379 5.82042 5.66063 5.62063 5.56764 5.41600
## Proportion of Variance 0.00122 0.0012 0.00119 0.00112 0.00111 0.00109 0.00103
## Cumulative Proportion 0.99326 0.9945 0.99565 0.99677 0.99788 0.99897 1.00000
## PC174
## Standard deviation 1.531e-13
## Proportion of Variance 0.000e+00
## Cumulative Proportion 1.000e+00
nt <- metadata$tcga_barcode[metadata$cgc_sample_sample_type == "Solid Tissue Normal"]
#normal_ids<- rownames(metadata)[rownames(metadata) %in% nt]
tumor_norm <- ifelse(metadata$tcga_barcode %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC1 (13.67%)", ylab = "PC2 (7.716%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
names(pca.tumor$x[, 1])[pca.tumor$x[, 1] < -130]
## [1] "TCGA-HV-A7OP-01A-11R-A33R-07" "TCGA-HZ-7289-01A-11R-2156-07"
## [3] "TCGA-FB-AAPP-01A-12R-A41B-07" "TCGA-FB-AAQ0-01A-31R-A41B-07"
list <- names(pca.tumor$x[, 1])[pca.tumor$x[, 1] < -130]
metadata_test <- metadata[rownames(metadata) %in% list,]
plot(pca.tumor$x[, 3], pca.tumor$x[, 4], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC3", ylab = "PC4", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
plot(pca.tumor$x[, 5], pca.tumor$x[, 6], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC5", ylab = "PC6", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
plot(pca.tumor$x[, 6], pca.tumor$x[, 7], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC6", ylab = "PC7", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
plot(pca.tumor$x[, 8], pca.tumor$x[, 9], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC8", ylab = "PC9", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
plot(pca.tumor$x[, 10], pca.tumor$x[, 11], pch = 20, col = tumor_norm , main = "PCA of PAAD", xlab = "PC10", ylab = "PC11", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
metadata_test <- metadata[rownames(metadata) %in% nt,]
correctly label
Note that PAAD tumors have a ton of cancer fibroblast which is why the tumor and normal don’t separate.
Liver cancer
import the liver data
liver_gtex_counts <- readRDS("~/data/recount3/recount3_fix_download/liver_gtex_counts.rds")
liver_gtex_metadata <- readRDS("~/data/recount3/recount3_fix_download/liver_gtex_metadata.rds")
colnames(liver_gtex_counts)[1:5]
## [1] "GTEX.13PVQ.1526.SM.5IFEQ.1" "GTEX.UTHO.2426.SM.4JBHD.1"
## [3] "GTEX.13NZB.0626.SM.5IFH6.1" "GTEX.ZPU1.0826.SM.57WG2.1"
## [5] "GTEX.XBEC.1526.SM.4AT68.1"
liver_gtex_metadata$external_id[1:5]
## [1] "GTEX-1192X-1026-SM-5H12P.1" "GTEX-11DXY-0526-SM-5EGGQ.1"
## [3] "GTEX-11DXZ-0126-SM-5EGGY.1" "GTEX-11EQ9-0526-SM-5A5JZ.1"
## [5] "GTEX-11GSP-0626-SM-5986T.1"
ids<- str_replace_all(colnames(liver_gtex_counts), "[[:punct:]]", "-")
meta_data_ids<- str_replace_all(liver_gtex_metadata$external_id, "[[:punct:]]", "-")
identical(ids[order(ids)], meta_data_ids[order(meta_data_ids)])
## [1] TRUE
liver_gtex_counts_order <- liver_gtex_counts[,order(ids)]
liver_gtex_metadata_order<- liver_gtex_metadata[order(meta_data_ids), ]
colnames(liver_gtex_counts_order )<- liver_gtex_metadata_order$external_id
recount3_count_liver <- as.data.frame(liver_gtex_counts_order)
saveRDS(recount3_count_liver, "/home/rstudio/data/liver_gtex_count_ordered.rds")
saveRDS(liver_gtex_metadata_order, "/home/rstudio/data/liver_gtex_metadata_ordered.rds")
library(DESeq2)
#colData(recount3_rse_PANCREAS)
#counts_liver[is.na(counts_liver)] <- 0
#sample has very high counts; removed for vst transformation
counts_liver <- recount3_count_liver [,!colnames(recount3_count_liver ) == "GTEX-WK11-1326-SM-4OOSI.1"]
vst_table <- vst(as.matrix(counts_liver))
pca.tumor <- prcomp(t(vst_table))
summary(pca.tumor)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 52.5967 35.94987 30.28704 23.8062 21.27946 18.65812
## Proportion of Variance 0.1909 0.08916 0.06329 0.0391 0.03124 0.02402
## Cumulative Proportion 0.1909 0.28003 0.34331 0.3824 0.41365 0.43767
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 16.86723 16.45599 15.37530 14.47524 14.24092 12.92163
## Proportion of Variance 0.01963 0.01868 0.01631 0.01446 0.01399 0.01152
## Cumulative Proportion 0.45730 0.47598 0.49229 0.50675 0.52074 0.53226
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 12.52814 12.4530 11.97517 11.64336 11.3602 10.54322
## Proportion of Variance 0.01083 0.0107 0.00989 0.00935 0.0089 0.00767
## Cumulative Proportion 0.54309 0.5538 0.56368 0.57303 0.5819 0.58961
## PC19 PC20 PC21 PC22 PC23 PC24 PC25
## Standard deviation 10.37654 10.2128 9.76497 9.67642 9.50243 9.08517 8.93310
## Proportion of Variance 0.00743 0.0072 0.00658 0.00646 0.00623 0.00569 0.00551
## Cumulative Proportion 0.59704 0.6042 0.61081 0.61727 0.62350 0.62919 0.63470
## PC26 PC27 PC28 PC29 PC30 PC31 PC32
## Standard deviation 8.80302 8.55262 8.38273 8.21609 7.99194 7.94219 7.88825
## Proportion of Variance 0.00535 0.00505 0.00485 0.00466 0.00441 0.00435 0.00429
## Cumulative Proportion 0.64005 0.64509 0.64994 0.65460 0.65900 0.66336 0.66765
## PC33 PC34 PC35 PC36 PC37 PC38 PC39
## Standard deviation 7.7988 7.7079 7.46246 7.34232 7.3271 7.24060 7.21482
## Proportion of Variance 0.0042 0.0041 0.00384 0.00372 0.0037 0.00362 0.00359
## Cumulative Proportion 0.6718 0.6759 0.67979 0.68351 0.6872 0.69083 0.69442
## PC40 PC41 PC42 PC43 PC44 PC45 PC46
## Standard deviation 7.09779 7.04855 6.90736 6.87666 6.82741 6.69147 6.66544
## Proportion of Variance 0.00348 0.00343 0.00329 0.00326 0.00322 0.00309 0.00307
## Cumulative Proportion 0.69789 0.70132 0.70461 0.70788 0.71109 0.71418 0.71725
## PC47 PC48 PC49 PC50 PC51 PC52 PC53
## Standard deviation 6.64234 6.56392 6.51980 6.46102 6.41040 6.27349 6.2539
## Proportion of Variance 0.00304 0.00297 0.00293 0.00288 0.00284 0.00272 0.0027
## Cumulative Proportion 0.72029 0.72326 0.72620 0.72908 0.73191 0.73463 0.7373
## PC54 PC55 PC56 PC57 PC58 PC59 PC60
## Standard deviation 6.19696 6.18021 6.10989 6.09423 5.96869 5.95260 5.90809
## Proportion of Variance 0.00265 0.00264 0.00258 0.00256 0.00246 0.00244 0.00241
## Cumulative Proportion 0.73997 0.74261 0.74518 0.74775 0.75020 0.75265 0.75506
## PC61 PC62 PC63 PC64 PC65 PC66 PC67
## Standard deviation 5.86654 5.81341 5.73502 5.71785 5.68002 5.63381 5.58592
## Proportion of Variance 0.00237 0.00233 0.00227 0.00226 0.00223 0.00219 0.00215
## Cumulative Proportion 0.75743 0.75976 0.76203 0.76429 0.76651 0.76870 0.77086
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## Standard deviation 5.55208 5.55071 5.5175 5.48626 5.42060 5.39719 5.3862
## Proportion of Variance 0.00213 0.00213 0.0021 0.00208 0.00203 0.00201 0.0020
## Cumulative Proportion 0.77298 0.77511 0.7772 0.77929 0.78131 0.78332 0.7853
## PC75 PC76 PC77 PC78 PC79 PC80 PC81
## Standard deviation 5.34500 5.32935 5.30261 5.26732 5.26020 5.23354 5.21590
## Proportion of Variance 0.00197 0.00196 0.00194 0.00191 0.00191 0.00189 0.00188
## Cumulative Proportion 0.78730 0.78925 0.79119 0.79311 0.79502 0.79691 0.79878
## PC82 PC83 PC84 PC85 PC86 PC87 PC88
## Standard deviation 5.19696 5.17214 5.15230 5.1146 5.1073 5.08535 5.05372
## Proportion of Variance 0.00186 0.00185 0.00183 0.0018 0.0018 0.00178 0.00176
## Cumulative Proportion 0.80065 0.80249 0.80433 0.8061 0.8079 0.80971 0.81148
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## Standard deviation 5.04817 5.02171 4.98902 4.97151 4.95648 4.93633 4.92196
## Proportion of Variance 0.00176 0.00174 0.00172 0.00171 0.00169 0.00168 0.00167
## Cumulative Proportion 0.81323 0.81497 0.81669 0.81840 0.82009 0.82177 0.82344
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## Standard deviation 4.91153 4.89032 4.88584 4.87962 4.85498 4.82651 4.8155
## Proportion of Variance 0.00166 0.00165 0.00165 0.00164 0.00163 0.00161 0.0016
## Cumulative Proportion 0.82511 0.82676 0.82840 0.83005 0.83167 0.83328 0.8349
## PC103 PC104 PC105 PC106 PC107 PC108 PC109
## Standard deviation 4.80713 4.78770 4.77864 4.75624 4.72984 4.71947 4.70393
## Proportion of Variance 0.00159 0.00158 0.00158 0.00156 0.00154 0.00154 0.00153
## Cumulative Proportion 0.83648 0.83806 0.83963 0.84119 0.84274 0.84427 0.84580
## PC110 PC111 PC112 PC113 PC114 PC115 PC116
## Standard deviation 4.68968 4.68400 4.6694 4.6620 4.65009 4.64362 4.63095
## Proportion of Variance 0.00152 0.00151 0.0015 0.0015 0.00149 0.00149 0.00148
## Cumulative Proportion 0.84732 0.84883 0.8503 0.8518 0.85333 0.85481 0.85629
## PC117 PC118 PC119 PC120 PC121 PC122 PC123
## Standard deviation 4.61497 4.60545 4.60076 4.57736 4.55731 4.54381 4.53646
## Proportion of Variance 0.00147 0.00146 0.00146 0.00145 0.00143 0.00142 0.00142
## Cumulative Proportion 0.85776 0.85923 0.86069 0.86213 0.86356 0.86499 0.86641
## PC124 PC125 PC126 PC127 PC128 PC129 PC130
## Standard deviation 4.52887 4.51799 4.4985 4.49233 4.47698 4.46410 4.44995
## Proportion of Variance 0.00142 0.00141 0.0014 0.00139 0.00138 0.00137 0.00137
## Cumulative Proportion 0.86782 0.86923 0.8706 0.87202 0.87340 0.87478 0.87614
## PC131 PC132 PC133 PC134 PC135 PC136 PC137
## Standard deviation 4.43709 4.42440 4.41385 4.40879 4.39936 4.39410 4.37551
## Proportion of Variance 0.00136 0.00135 0.00134 0.00134 0.00134 0.00133 0.00132
## Cumulative Proportion 0.87750 0.87885 0.88020 0.88154 0.88287 0.88421 0.88553
## PC138 PC139 PC140 PC141 PC142 PC143 PC144
## Standard deviation 4.36735 4.35527 4.3452 4.3347 4.31760 4.31106 4.30843
## Proportion of Variance 0.00132 0.00131 0.0013 0.0013 0.00129 0.00128 0.00128
## Cumulative Proportion 0.88684 0.88815 0.8894 0.8908 0.89204 0.89332 0.89460
## PC145 PC146 PC147 PC148 PC149 PC150 PC151
## Standard deviation 4.30116 4.28136 4.27973 4.27833 4.26897 4.25527 4.24456
## Proportion of Variance 0.00128 0.00126 0.00126 0.00126 0.00126 0.00125 0.00124
## Cumulative Proportion 0.89588 0.89714 0.89840 0.89967 0.90092 0.90217 0.90342
## PC152 PC153 PC154 PC155 PC156 PC157 PC158
## Standard deviation 4.23175 4.22609 4.22287 4.21212 4.19407 4.19254 4.18540
## Proportion of Variance 0.00124 0.00123 0.00123 0.00122 0.00121 0.00121 0.00121
## Cumulative Proportion 0.90465 0.90588 0.90711 0.90834 0.90955 0.91076 0.91197
## PC159 PC160 PC161 PC162 PC163 PC164 PC165
## Standard deviation 4.18148 4.1757 4.15945 4.15326 4.15064 4.13266 4.13060
## Proportion of Variance 0.00121 0.0012 0.00119 0.00119 0.00119 0.00118 0.00118
## Cumulative Proportion 0.91318 0.9144 0.91558 0.91677 0.91796 0.91913 0.92031
## PC166 PC167 PC168 PC169 PC170 PC171 PC172
## Standard deviation 4.12239 4.10979 4.10381 4.08049 4.07807 4.07175 4.06428
## Proportion of Variance 0.00117 0.00117 0.00116 0.00115 0.00115 0.00114 0.00114
## Cumulative Proportion 0.92148 0.92265 0.92381 0.92496 0.92611 0.92725 0.92839
## PC173 PC174 PC175 PC176 PC177 PC178 PC179
## Standard deviation 4.05928 4.04905 4.04227 4.03604 4.03148 4.01621 4.00615
## Proportion of Variance 0.00114 0.00113 0.00113 0.00112 0.00112 0.00111 0.00111
## Cumulative Proportion 0.92953 0.93066 0.93179 0.93291 0.93403 0.93514 0.93625
## PC180 PC181 PC182 PC183 PC184 PC185 PC186
## Standard deviation 4.00344 3.9970 3.9847 3.98190 3.97795 3.97097 3.96063
## Proportion of Variance 0.00111 0.0011 0.0011 0.00109 0.00109 0.00109 0.00108
## Cumulative Proportion 0.93736 0.9385 0.9395 0.94065 0.94174 0.94283 0.94391
## PC187 PC188 PC189 PC190 PC191 PC192 PC193
## Standard deviation 3.94925 3.94394 3.93558 3.92959 3.91346 3.90818 3.89527
## Proportion of Variance 0.00108 0.00107 0.00107 0.00107 0.00106 0.00105 0.00105
## Cumulative Proportion 0.94499 0.94606 0.94713 0.94819 0.94925 0.95030 0.95135
## PC194 PC195 PC196 PC197 PC198 PC199 PC200
## Standard deviation 3.88545 3.88213 3.87032 3.86698 3.86364 3.85295 3.83998
## Proportion of Variance 0.00104 0.00104 0.00103 0.00103 0.00103 0.00102 0.00102
## Cumulative Proportion 0.95239 0.95343 0.95446 0.95550 0.95653 0.95755 0.95857
## PC201 PC202 PC203 PC204 PC205 PC206 PC207
## Standard deviation 3.82618 3.82292 3.81866 3.8102 3.8091 3.79490 3.78678
## Proportion of Variance 0.00101 0.00101 0.00101 0.0010 0.0010 0.00099 0.00099
## Cumulative Proportion 0.95958 0.96059 0.96159 0.9626 0.9636 0.96459 0.96558
## PC208 PC209 PC210 PC211 PC212 PC213 PC214
## Standard deviation 3.77731 3.77639 3.75850 3.75212 3.74980 3.73737 3.73401
## Proportion of Variance 0.00098 0.00098 0.00097 0.00097 0.00097 0.00096 0.00096
## Cumulative Proportion 0.96656 0.96755 0.96852 0.96949 0.97046 0.97143 0.97239
## PC215 PC216 PC217 PC218 PC219 PC220 PC221
## Standard deviation 3.72930 3.71798 3.70684 3.69653 3.68231 3.67427 3.66526
## Proportion of Variance 0.00096 0.00095 0.00095 0.00094 0.00094 0.00093 0.00093
## Cumulative Proportion 0.97335 0.97430 0.97525 0.97619 0.97713 0.97806 0.97899
## PC222 PC223 PC224 PC225 PC226 PC227 PC228
## Standard deviation 3.64503 3.63663 3.63108 3.6128 3.59799 3.59173 3.58447
## Proportion of Variance 0.00092 0.00091 0.00091 0.0009 0.00089 0.00089 0.00089
## Cumulative Proportion 0.97990 0.98081 0.98172 0.9826 0.98352 0.98441 0.98529
## PC229 PC230 PC231 PC232 PC233 PC234 PC235
## Standard deviation 3.58336 3.57029 3.55477 3.54652 3.53573 3.52205 3.50984
## Proportion of Variance 0.00089 0.00088 0.00087 0.00087 0.00086 0.00086 0.00085
## Cumulative Proportion 0.98618 0.98706 0.98793 0.98880 0.98966 0.99052 0.99137
## PC236 PC237 PC238 PC239 PC240 PC241 PC242
## Standard deviation 3.50034 3.48644 3.45560 3.44211 3.42502 3.38215 3.26243
## Proportion of Variance 0.00085 0.00084 0.00082 0.00082 0.00081 0.00079 0.00073
## Cumulative Proportion 0.99221 0.99305 0.99388 0.99469 0.99550 0.99629 0.99703
## PC243 PC244 PC245 PC246 PC247 PC248
## Standard deviation 3.22816 3.14439 3.06517 2.61396 2.56508 1.447e-13
## Proportion of Variance 0.00072 0.00068 0.00065 0.00047 0.00045 0.000e+00
## Cumulative Proportion 0.99774 0.99843 0.99907 0.99955 1.00000 1.000e+00
## PC249 PC250
## Standard deviation 2.336e-14 1.602e-14
## Proportion of Variance 0.000e+00 0.000e+00
## Cumulative Proportion 1.000e+00 1.000e+00
liver_gtex_metadata_order_v2<- liver_gtex_metadata_order[! liver_gtex_metadata_order$external_id == "GTEX-WK11-1326-SM-4OOSI.1",]
sex<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$SEX== "2"]
sex<- sex[!is.na(sex)]
tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id%in% sex, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("female", "male/normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
age<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$AGE == "70-79"]
age<- age[!is.na(age)]
tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("70-79", "! 70-79"), pch = 21, pt.bg = c("red", "black"), col = "black")
age<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$AGE== "20-29"]
age<- age[!is.na(age)]
tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("20-29", "! 20-29"), pch = 21, pt.bg = c("red", "black"), col = "black")
looking at the RIN score
rin<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$SMRIN >= 7]
rin<- rin[!is.na(rin)]
tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% rin, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Liver",xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 7", "< 7"), pch = 21, pt.bg = c("red", "black"), col = "black")
SMTSISCH- indicates the minutes of ischemia time
time<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$SMTSISCH >= 500]
time<- time[!is.na(time)]
tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% time, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("bottomleft", legend = c(">= 500", "< 500"), pch = 21, pt.bg = c("red", "black"), col = "black")
LIHC tumor samples
lihc_tcga_metadata <- readRDS("~/data/recount3/recount3_fix_download/lihc_tcga_metadata.rds")
lihc_tcga_counts <- readRDS("~/data/recount3/recount3_fix_download/lihc_tcga_counts.rds")
check counts and metadata info
dim(lihc_tcga_counts)
## [1] 63856 424
dim(lihc_tcga_metadata)
## [1] 424 840
nchar(colnames(lihc_tcga_counts)[1])
## [1] 36
ids<- substrRight(colnames(lihc_tcga_counts), 36)
ids<- str_replace_all(ids, "[[:punct:]]", "-")
colnames(lihc_tcga_counts) <- ids
lihc_tcga_counts_order <- lihc_tcga_counts[,order(ids)]
lihc_tcga_metadata_order<- lihc_tcga_metadata[order(lihc_tcga_metadata$external_id), ]
identical(colnames(lihc_tcga_counts_order), lihc_tcga_metadata_order$external_id)
## [1] TRUE
colnames(lihc_tcga_counts_order)<- lihc_tcga_metadata_order$tcga_barcode
saveRDS(lihc_tcga_counts_order, "/home/rstudio/data/lihc_tcga_count_ordered.rds")
saveRDS(lihc_tcga_metadata_order, "/home/rstudio/data/lihc_tcga_metadata_ordered.rds")
counts_liver <- lihc_tcga_counts_order
vst_table <- vst(as.matrix(counts_liver))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 66.2189 60.4903 50.83139 38.81353 37.14832 33.13239
## Proportion of Variance 0.1124 0.0938 0.06624 0.03862 0.03538 0.02814
## Cumulative Proportion 0.1124 0.2062 0.27245 0.31107 0.34645 0.37459
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 30.03564 27.67645 24.72885 23.23548 22.41560 21.03548
## Proportion of Variance 0.02313 0.01964 0.01568 0.01384 0.01288 0.01134
## Cumulative Proportion 0.39771 0.41735 0.43303 0.44687 0.45975 0.47109
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 20.59000 19.49061 18.96662 17.62103 17.25819 16.93320
## Proportion of Variance 0.01087 0.00974 0.00922 0.00796 0.00764 0.00735
## Cumulative Proportion 0.48196 0.49170 0.50092 0.50888 0.51651 0.52386
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 15.93474 15.63198 15.5561 15.1757 14.87444 14.80210
## Proportion of Variance 0.00651 0.00626 0.0062 0.0059 0.00567 0.00562
## Cumulative Proportion 0.53037 0.53664 0.5428 0.5487 0.55442 0.56003
## PC25 PC26 PC27 PC28 PC29 PC30
## Standard deviation 14.11845 13.8301 13.75902 13.41666 13.28879 13.0966
## Proportion of Variance 0.00511 0.0049 0.00485 0.00461 0.00453 0.0044
## Cumulative Proportion 0.56514 0.5700 0.57490 0.57951 0.58404 0.5884
## PC31 PC32 PC33 PC34 PC35 PC36
## Standard deviation 12.93584 12.79002 12.48329 12.3358 12.20697 11.90233
## Proportion of Variance 0.00429 0.00419 0.00399 0.0039 0.00382 0.00363
## Cumulative Proportion 0.59273 0.59692 0.60092 0.6048 0.60864 0.61227
## PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 11.81195 11.71324 11.45599 11.37234 11.28823 11.19063
## Proportion of Variance 0.00358 0.00352 0.00336 0.00332 0.00327 0.00321
## Cumulative Proportion 0.61585 0.61936 0.62273 0.62604 0.62931 0.63252
## PC43 PC44 PC45 PC46 PC47 PC48
## Standard deviation 11.14433 11.02312 10.93664 10.79939 10.59888 10.57217
## Proportion of Variance 0.00318 0.00311 0.00307 0.00299 0.00288 0.00287
## Cumulative Proportion 0.63570 0.63882 0.64189 0.64487 0.64775 0.65062
## PC49 PC50 PC51 PC52 PC53 PC54
## Standard deviation 10.50339 10.38534 10.35504 10.19542 10.15638 10.05540
## Proportion of Variance 0.00283 0.00276 0.00275 0.00266 0.00264 0.00259
## Cumulative Proportion 0.65345 0.65621 0.65896 0.66163 0.66427 0.66686
## PC55 PC56 PC57 PC58 PC59 PC60 PC61
## Standard deviation 10.01562 9.91745 9.85720 9.81393 9.72360 9.65246 9.61267
## Proportion of Variance 0.00257 0.00252 0.00249 0.00247 0.00242 0.00239 0.00237
## Cumulative Proportion 0.66943 0.67196 0.67445 0.67692 0.67934 0.68173 0.68410
## PC62 PC63 PC64 PC65 PC66 PC67 PC68
## Standard deviation 9.4759 9.42952 9.37313 9.34285 9.2554 9.17226 9.11433
## Proportion of Variance 0.0023 0.00228 0.00225 0.00224 0.0022 0.00216 0.00213
## Cumulative Proportion 0.6864 0.68868 0.69093 0.69317 0.6954 0.69752 0.69965
## PC69 PC70 PC71 PC72 PC73 PC74 PC75
## Standard deviation 9.0541 8.95341 8.93008 8.86919 8.79246 8.74333 8.71550
## Proportion of Variance 0.0021 0.00206 0.00204 0.00202 0.00198 0.00196 0.00195
## Cumulative Proportion 0.7017 0.70381 0.70585 0.70787 0.70985 0.71181 0.71376
## PC76 PC77 PC78 PC79 PC80 PC81 PC82
## Standard deviation 8.70154 8.68756 8.6032 8.51945 8.50156 8.49045 8.43648
## Proportion of Variance 0.00194 0.00193 0.0019 0.00186 0.00185 0.00185 0.00182
## Cumulative Proportion 0.71570 0.71763 0.7195 0.72139 0.72324 0.72509 0.72692
## PC83 PC84 PC85 PC86 PC87 PC88 PC89
## Standard deviation 8.40262 8.31537 8.27790 8.22888 8.20397 8.16564 8.12088
## Proportion of Variance 0.00181 0.00177 0.00176 0.00174 0.00173 0.00171 0.00169
## Cumulative Proportion 0.72873 0.73050 0.73225 0.73399 0.73572 0.73743 0.73912
## PC90 PC91 PC92 PC93 PC94 PC95 PC96
## Standard deviation 8.09440 8.03922 8.02853 7.98425 7.94711 7.91977 7.8962
## Proportion of Variance 0.00168 0.00166 0.00165 0.00163 0.00162 0.00161 0.0016
## Cumulative Proportion 0.74080 0.74245 0.74410 0.74574 0.74736 0.74897 0.7506
## PC97 PC98 PC99 PC100 PC101 PC102 PC103
## Standard deviation 7.86426 7.81706 7.80025 7.77617 7.72169 7.69947 7.66627
## Proportion of Variance 0.00159 0.00157 0.00156 0.00155 0.00153 0.00152 0.00151
## Cumulative Proportion 0.75215 0.75372 0.75528 0.75683 0.75835 0.75987 0.76138
## PC104 PC105 PC106 PC107 PC108 PC109 PC110
## Standard deviation 7.6425 7.58165 7.57433 7.54302 7.52131 7.50193 7.47106
## Proportion of Variance 0.0015 0.00147 0.00147 0.00146 0.00145 0.00144 0.00143
## Cumulative Proportion 0.7629 0.76435 0.76582 0.76728 0.76873 0.77017 0.77161
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## Standard deviation 7.43719 7.42747 7.3982 7.3817 7.33785 7.30415 7.26904
## Proportion of Variance 0.00142 0.00141 0.0014 0.0014 0.00138 0.00137 0.00135
## Cumulative Proportion 0.77302 0.77444 0.7758 0.7772 0.77862 0.77999 0.78134
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## Standard deviation 7.22758 7.20132 7.17759 7.16990 7.15062 7.1320 7.1124
## Proportion of Variance 0.00134 0.00133 0.00132 0.00132 0.00131 0.0013 0.0013
## Cumulative Proportion 0.78268 0.78401 0.78533 0.78665 0.78796 0.7893 0.7906
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## Standard deviation 7.09317 7.05287 7.04517 6.99216 6.98468 6.93722 6.92417
## Proportion of Variance 0.00129 0.00128 0.00127 0.00125 0.00125 0.00123 0.00123
## Cumulative Proportion 0.79185 0.79312 0.79440 0.79565 0.79690 0.79813 0.79936
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## Standard deviation 6.90766 6.88579 6.88300 6.8354 6.82245 6.81843 6.79475
## Proportion of Variance 0.00122 0.00122 0.00121 0.0012 0.00119 0.00119 0.00118
## Cumulative Proportion 0.80059 0.80180 0.80302 0.8042 0.80541 0.80660 0.80778
## PC139 PC140 PC141 PC142 PC143 PC144 PC145
## Standard deviation 6.77205 6.73636 6.72509 6.71766 6.70752 6.68737 6.65455
## Proportion of Variance 0.00118 0.00116 0.00116 0.00116 0.00115 0.00115 0.00114
## Cumulative Proportion 0.80896 0.81012 0.81128 0.81244 0.81359 0.81474 0.81587
## PC146 PC147 PC148 PC149 PC150 PC151 PC152
## Standard deviation 6.64803 6.63572 6.61354 6.59938 6.5648 6.5552 6.52631
## Proportion of Variance 0.00113 0.00113 0.00112 0.00112 0.0011 0.0011 0.00109
## Cumulative Proportion 0.81701 0.81813 0.81926 0.82037 0.8215 0.8226 0.82367
## PC153 PC154 PC155 PC156 PC157 PC158 PC159
## Standard deviation 6.50796 6.49522 6.48419 6.44951 6.44479 6.42496 6.40994
## Proportion of Variance 0.00109 0.00108 0.00108 0.00107 0.00106 0.00106 0.00105
## Cumulative Proportion 0.82476 0.82584 0.82691 0.82798 0.82905 0.83010 0.83116
## PC160 PC161 PC162 PC163 PC164 PC165 PC166
## Standard deviation 6.39299 6.37721 6.36509 6.35203 6.33379 6.32519 6.30938
## Proportion of Variance 0.00105 0.00104 0.00104 0.00103 0.00103 0.00103 0.00102
## Cumulative Proportion 0.83221 0.83325 0.83429 0.83532 0.83635 0.83737 0.83840
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## Standard deviation 6.30059 6.28765 6.28444 6.2606 6.2466 6.2346 6.22388
## Proportion of Variance 0.00102 0.00101 0.00101 0.0010 0.0010 0.0010 0.00099
## Cumulative Proportion 0.83941 0.84043 0.84144 0.8424 0.8434 0.8444 0.84543
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## Standard deviation 6.21188 6.19398 6.16575 6.15652 6.14708 6.13491 6.12347
## Proportion of Variance 0.00099 0.00098 0.00097 0.00097 0.00097 0.00096 0.00096
## Cumulative Proportion 0.84642 0.84741 0.84838 0.84935 0.85032 0.85129 0.85225
## PC181 PC182 PC183 PC184 PC185 PC186 PC187
## Standard deviation 6.10496 6.08151 6.07288 6.06381 6.03761 6.02356 6.00820
## Proportion of Variance 0.00096 0.00095 0.00095 0.00094 0.00093 0.00093 0.00093
## Cumulative Proportion 0.85320 0.85415 0.85510 0.85604 0.85697 0.85790 0.85883
## PC188 PC189 PC190 PC191 PC192 PC193 PC194
## Standard deviation 6.00471 5.98311 5.97582 5.95417 5.94457 5.9358 5.9202
## Proportion of Variance 0.00092 0.00092 0.00092 0.00091 0.00091 0.0009 0.0009
## Cumulative Proportion 0.85975 0.86067 0.86159 0.86250 0.86340 0.8643 0.8652
## PC195 PC196 PC197 PC198 PC199 PC200 PC201
## Standard deviation 5.9159 5.90440 5.88657 5.87824 5.86614 5.85466 5.83587
## Proportion of Variance 0.0009 0.00089 0.00089 0.00089 0.00088 0.00088 0.00087
## Cumulative Proportion 0.8661 0.86699 0.86788 0.86877 0.86965 0.87053 0.87140
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## Standard deviation 5.82685 5.80908 5.80554 5.79277 5.78815 5.77404 5.76327
## Proportion of Variance 0.00087 0.00087 0.00086 0.00086 0.00086 0.00085 0.00085
## Cumulative Proportion 0.87227 0.87314 0.87400 0.87486 0.87572 0.87657 0.87743
## PC209 PC210 PC211 PC212 PC213 PC214 PC215
## Standard deviation 5.74654 5.73250 5.72419 5.72130 5.70775 5.69691 5.68702
## Proportion of Variance 0.00085 0.00084 0.00084 0.00084 0.00084 0.00083 0.00083
## Cumulative Proportion 0.87827 0.87912 0.87996 0.88079 0.88163 0.88246 0.88329
## PC216 PC217 PC218 PC219 PC220 PC221 PC222
## Standard deviation 5.68003 5.66410 5.65935 5.64917 5.62344 5.60512 5.6025
## Proportion of Variance 0.00083 0.00082 0.00082 0.00082 0.00081 0.00081 0.0008
## Cumulative Proportion 0.88412 0.88494 0.88576 0.88658 0.88739 0.88820 0.8890
## PC223 PC224 PC225 PC226 PC227 PC228 PC229
## Standard deviation 5.5867 5.5775 5.56768 5.56228 5.54801 5.53775 5.52942
## Proportion of Variance 0.0008 0.0008 0.00079 0.00079 0.00079 0.00079 0.00078
## Cumulative Proportion 0.8898 0.8906 0.89139 0.89219 0.89297 0.89376 0.89454
## PC230 PC231 PC232 PC233 PC234 PC235 PC236
## Standard deviation 5.52205 5.51287 5.50623 5.49011 5.48673 5.47425 5.45998
## Proportion of Variance 0.00078 0.00078 0.00078 0.00077 0.00077 0.00077 0.00076
## Cumulative Proportion 0.89533 0.89611 0.89688 0.89766 0.89843 0.89920 0.89996
## PC237 PC238 PC239 PC240 PC241 PC242 PC243
## Standard deviation 5.45317 5.43706 5.42950 5.42328 5.40189 5.37561 5.36945
## Proportion of Variance 0.00076 0.00076 0.00076 0.00075 0.00075 0.00074 0.00074
## Cumulative Proportion 0.90072 0.90148 0.90224 0.90299 0.90374 0.90448 0.90522
## PC244 PC245 PC246 PC247 PC248 PC249 PC250
## Standard deviation 5.36577 5.35541 5.33641 5.32938 5.32848 5.31860 5.31274
## Proportion of Variance 0.00074 0.00074 0.00073 0.00073 0.00073 0.00073 0.00072
## Cumulative Proportion 0.90596 0.90669 0.90742 0.90815 0.90888 0.90960 0.91033
## PC251 PC252 PC253 PC254 PC255 PC256 PC257
## Standard deviation 5.30960 5.30037 5.28031 5.26938 5.25997 5.24633 5.2420
## Proportion of Variance 0.00072 0.00072 0.00071 0.00071 0.00071 0.00071 0.0007
## Cumulative Proportion 0.91105 0.91177 0.91248 0.91319 0.91390 0.91461 0.9153
## PC258 PC259 PC260 PC261 PC262 PC263 PC264
## Standard deviation 5.2302 5.2151 5.20514 5.19917 5.18928 5.18299 5.17808
## Proportion of Variance 0.0007 0.0007 0.00069 0.00069 0.00069 0.00069 0.00069
## Cumulative Proportion 0.9160 0.9167 0.91741 0.91810 0.91879 0.91948 0.92017
## PC265 PC266 PC267 PC268 PC269 PC270 PC271
## Standard deviation 5.16887 5.14921 5.14114 5.13361 5.12148 5.11617 5.10591
## Proportion of Variance 0.00068 0.00068 0.00068 0.00068 0.00067 0.00067 0.00067
## Cumulative Proportion 0.92085 0.92153 0.92221 0.92288 0.92356 0.92423 0.92490
## PC272 PC273 PC274 PC275 PC276 PC277 PC278
## Standard deviation 5.09532 5.08570 5.07254 5.05696 5.04961 5.04434 5.03740
## Proportion of Variance 0.00067 0.00066 0.00066 0.00066 0.00065 0.00065 0.00065
## Cumulative Proportion 0.92556 0.92622 0.92688 0.92754 0.92819 0.92885 0.92950
## PC279 PC280 PC281 PC282 PC283 PC284 PC285
## Standard deviation 5.03214 5.02372 5.02127 5.00657 4.99789 4.99025 4.98303
## Proportion of Variance 0.00065 0.00065 0.00065 0.00064 0.00064 0.00064 0.00064
## Cumulative Proportion 0.93015 0.93079 0.93144 0.93208 0.93272 0.93336 0.93400
## PC286 PC287 PC288 PC289 PC290 PC291 PC292
## Standard deviation 4.97874 4.96314 4.95727 4.94397 4.93021 4.92624 4.91213
## Proportion of Variance 0.00064 0.00063 0.00063 0.00063 0.00062 0.00062 0.00062
## Cumulative Proportion 0.93463 0.93526 0.93589 0.93652 0.93714 0.93777 0.93838
## PC293 PC294 PC295 PC296 PC297 PC298 PC299
## Standard deviation 4.90909 4.90160 4.89691 4.87850 4.87178 4.86361 4.8509
## Proportion of Variance 0.00062 0.00062 0.00061 0.00061 0.00061 0.00061 0.0006
## Cumulative Proportion 0.93900 0.93962 0.94023 0.94084 0.94145 0.94206 0.9427
## PC300 PC301 PC302 PC303 PC304 PC305 PC306
## Standard deviation 4.8386 4.8359 4.8288 4.8185 4.80154 4.79106 4.78740
## Proportion of Variance 0.0006 0.0006 0.0006 0.0006 0.00059 0.00059 0.00059
## Cumulative Proportion 0.9433 0.9439 0.9445 0.9450 0.94564 0.94623 0.94682
## PC307 PC308 PC309 PC310 PC311 PC312 PC313
## Standard deviation 4.77642 4.77353 4.76223 4.75264 4.74697 4.74254 4.73571
## Proportion of Variance 0.00058 0.00058 0.00058 0.00058 0.00058 0.00058 0.00057
## Cumulative Proportion 0.94740 0.94799 0.94857 0.94915 0.94973 0.95030 0.95088
## PC314 PC315 PC316 PC317 PC318 PC319 PC320
## Standard deviation 4.72373 4.71295 4.70918 4.70470 4.68482 4.66986 4.66016
## Proportion of Variance 0.00057 0.00057 0.00057 0.00057 0.00056 0.00056 0.00056
## Cumulative Proportion 0.95145 0.95202 0.95259 0.95316 0.95372 0.95428 0.95483
## PC321 PC322 PC323 PC324 PC325 PC326 PC327
## Standard deviation 4.65292 4.64273 4.63501 4.62715 4.61816 4.61295 4.60138
## Proportion of Variance 0.00055 0.00055 0.00055 0.00055 0.00055 0.00055 0.00054
## Cumulative Proportion 0.95539 0.95594 0.95649 0.95704 0.95759 0.95813 0.95868
## PC328 PC329 PC330 PC331 PC332 PC333 PC334
## Standard deviation 4.59559 4.58464 4.57895 4.56647 4.55779 4.55681 4.54824
## Proportion of Variance 0.00054 0.00054 0.00054 0.00053 0.00053 0.00053 0.00053
## Cumulative Proportion 0.95922 0.95976 0.96029 0.96083 0.96136 0.96189 0.96242
## PC335 PC336 PC337 PC338 PC339 PC340 PC341
## Standard deviation 4.52950 4.52450 4.52059 4.50711 4.49866 4.48588 4.48235
## Proportion of Variance 0.00053 0.00052 0.00052 0.00052 0.00052 0.00052 0.00052
## Cumulative Proportion 0.96295 0.96347 0.96400 0.96452 0.96504 0.96555 0.96607
## PC342 PC343 PC344 PC345 PC346 PC347 PC348
## Standard deviation 4.46636 4.46061 4.45713 4.44465 4.4262 4.4189 4.4026
## Proportion of Variance 0.00051 0.00051 0.00051 0.00051 0.0005 0.0005 0.0005
## Cumulative Proportion 0.96658 0.96709 0.96760 0.96811 0.9686 0.9691 0.9696
## PC349 PC350 PC351 PC352 PC353 PC354 PC355
## Standard deviation 4.3978 4.37642 4.37232 4.35889 4.34876 4.34542 4.32851
## Proportion of Variance 0.0005 0.00049 0.00049 0.00049 0.00048 0.00048 0.00048
## Cumulative Proportion 0.9701 0.97059 0.97108 0.97157 0.97205 0.97254 0.97302
## PC356 PC357 PC358 PC359 PC360 PC361 PC362
## Standard deviation 4.32059 4.30308 4.29827 4.29106 4.28856 4.27829 4.26249
## Proportion of Variance 0.00048 0.00047 0.00047 0.00047 0.00047 0.00047 0.00047
## Cumulative Proportion 0.97350 0.97397 0.97445 0.97492 0.97539 0.97586 0.97632
## PC363 PC364 PC365 PC366 PC367 PC368 PC369
## Standard deviation 4.25341 4.23846 4.23659 4.23118 4.21863 4.20706 4.19351
## Proportion of Variance 0.00046 0.00046 0.00046 0.00046 0.00046 0.00045 0.00045
## Cumulative Proportion 0.97679 0.97725 0.97771 0.97817 0.97862 0.97908 0.97953
## PC370 PC371 PC372 PC373 PC374 PC375 PC376
## Standard deviation 4.18810 4.17791 4.17259 4.15299 4.14799 4.13346 4.11874
## Proportion of Variance 0.00045 0.00045 0.00045 0.00044 0.00044 0.00044 0.00043
## Cumulative Proportion 0.97998 0.98043 0.98087 0.98131 0.98176 0.98219 0.98263
## PC377 PC378 PC379 PC380 PC381 PC382 PC383
## Standard deviation 4.11160 4.10540 4.08205 4.07504 4.06574 4.04579 4.04418
## Proportion of Variance 0.00043 0.00043 0.00043 0.00043 0.00042 0.00042 0.00042
## Cumulative Proportion 0.98306 0.98349 0.98392 0.98435 0.98477 0.98519 0.98561
## PC384 PC385 PC386 PC387 PC388 PC389 PC390
## Standard deviation 4.02632 4.01997 4.00707 4.00413 3.99444 3.9718 3.9617
## Proportion of Variance 0.00042 0.00041 0.00041 0.00041 0.00041 0.0004 0.0004
## Cumulative Proportion 0.98602 0.98644 0.98685 0.98726 0.98767 0.9881 0.9885
## PC391 PC392 PC393 PC394 PC395 PC396 PC397
## Standard deviation 3.9486 3.9340 3.92043 3.91862 3.90287 3.89173 3.87552
## Proportion of Variance 0.0004 0.0004 0.00039 0.00039 0.00039 0.00039 0.00039
## Cumulative Proportion 0.9889 0.9893 0.98967 0.99006 0.99045 0.99084 0.99123
## PC398 PC399 PC400 PC401 PC402 PC403 PC404
## Standard deviation 3.86324 3.85069 3.83834 3.82786 3.80718 3.80150 3.78391
## Proportion of Variance 0.00038 0.00038 0.00038 0.00038 0.00037 0.00037 0.00037
## Cumulative Proportion 0.99161 0.99199 0.99237 0.99274 0.99311 0.99348 0.99385
## PC405 PC406 PC407 PC408 PC409 PC410 PC411
## Standard deviation 3.75824 3.73651 3.73300 3.72056 3.70522 3.69386 3.67066
## Proportion of Variance 0.00036 0.00036 0.00036 0.00035 0.00035 0.00035 0.00035
## Cumulative Proportion 0.99421 0.99457 0.99493 0.99528 0.99563 0.99598 0.99633
## PC412 PC413 PC414 PC415 PC416 PC417 PC418
## Standard deviation 3.63332 3.62893 3.58688 3.55919 3.53632 3.50618 3.46783
## Proportion of Variance 0.00034 0.00034 0.00033 0.00032 0.00032 0.00032 0.00031
## Cumulative Proportion 0.99667 0.99701 0.99734 0.99766 0.99798 0.99830 0.99860
## PC419 PC420 PC421 PC422 PC423 PC424
## Standard deviation 3.45167 3.36417 3.32868 3.18659 3.16115 1.467e-13
## Proportion of Variance 0.00031 0.00029 0.00028 0.00026 0.00026 0.000e+00
## Cumulative Proportion 0.99891 0.99920 0.99948 0.99974 1.00000 1.000e+00
#recount3_rse_LIHC@colData
nt <- lihc_tcga_metadata_order$tcga_barcode[lihc_tcga_metadata_order$cgc_sample_sample_type == "Solid Tissue Normal"]
tumor_norm <- ifelse(lihc_tcga_metadata_order$tcga_barcode %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of LIHC", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
#recount3_rse_LIHC@colData
nt <- lihc_tcga_metadata_order$tcga_barcode[lihc_tcga_metadata_order$xml_days_to_birth < -20000]
tumor_norm <- ifelse(lihc_tcga_metadata_order$tcga_barcode %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of LIHC", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">-2000 day until birth", ">-2000 day until birth"), pch = 21, pt.bg = c("red", "black"), col = "black")
lung cancer import the lung data
lung_gtex_counts <- readRDS("~/data/recount3/recount3_fix_download/lung_gtex_counts.rds")
lung_gtex_metadata <- readRDS("~/data/recount3/recount3_fix_download/lung_gtex_metadata.rds")
colnames(lung_gtex_counts)[1:5]
## [1] "GTEX.PLZ5.0726.SM.2I5F9.1" "GTEX.12WSN.0626.SM.5BC61.1"
## [3] "GTEX.11TT1.1626.SM.5EQL7.1" "GTEX.PX3G.0526.SM.2I3EM.1"
## [5] "GTEX.11P7K.0326.SM.59871.1"
lung_gtex_metadata$external_id[1:5]
## [1] "GTEX-111CU-0326-SM-5GZXO.1" "GTEX-111FC-1126-SM-5GZWU.1"
## [3] "GTEX-111VG-0726-SM-5GIDC.1" "GTEX-111YS-0626-SM-5GZXV.1"
## [5] "GTEX-1122O-0126-SM-5GICA.1"
ids<- str_replace_all(colnames(lung_gtex_counts), "[[:punct:]]", "-")
meta_data_ids<- str_replace_all(lung_gtex_metadata$external_id, "[[:punct:]]", "-")
identical(ids[order(ids)], meta_data_ids[order(meta_data_ids)])
## [1] TRUE
lung_gtex_counts_order <- lung_gtex_counts[,order(ids)]
lung_gtex_metadata_order<- lung_gtex_metadata[order(meta_data_ids), ]
colnames(lung_gtex_counts_order )<- lung_gtex_metadata_order$external_id
recount3_count_lung <- as.data.frame(lung_gtex_counts_order)
saveRDS(recount3_count_lung, "/home/rstudio/data/lung_gtex_count_ordered.rds")
saveRDS(lung_gtex_metadata_order, "/home/rstudio/data/lung_gtex_metadata_ordered.rds")
counts_lung <- recount3_count_lung
vst_table <- vst(as.matrix(counts_lung))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
x<- summary(pca.tumor)
y<- x$importance
y[,1:10]
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 48.81666 36.70425 32.96865 28.25734 23.56789 22.67199
## Proportion of Variance 0.14496 0.08195 0.06612 0.04857 0.03379 0.03127
## Cumulative Proportion 0.14496 0.22690 0.29302 0.34159 0.37537 0.40664
## PC7 PC8 PC9 PC10
## Standard deviation 20.30030 18.74630 17.71035 16.30350
## Proportion of Variance 0.02507 0.02138 0.01908 0.01617
## Cumulative Proportion 0.43171 0.45308 0.47216 0.48833
sex<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$SEX == "2"]
sex<- sex[!is.na(sex)]
tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% sex, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("female", "male/normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
age<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$AGE == "70-79"]
age<- sex[!is.na(age)]
tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("70-79", "!70-79"), pch = 21, pt.bg = c("red", "black"), col = "black")
age<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$AGE== "20-29"]
age<- age[!is.na(age)]
tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("20-29", "! 20-29"), pch = 21, pt.bg = c("red", "black"), col = "black")
rin<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$SMRIN >= 7]
rin<- rin[!is.na(rin)]
tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% rin, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)",cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 7", "< 7"), pch = 21, pt.bg = c("red", "black"), col = "black")
SMTSISCH
time<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$SMTSISCH >= 500]
time<- time[!is.na(time)]
tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% time, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)",cex.axis = "1.5", cex.lab = "1.5")
legend("bottomleft", legend = c(">= 500", "< 500"), pch = 21, pt.bg = c("red", "black"), col = "black")
LAUD
luad_tcga_metadata <- readRDS("~/data/recount3/recount3_fix_download/luad_tcga_metadata.rds")
luad_tcga_counts <- readRDS("~/data/recount3/recount3_fix_download/luad_tcga_counts.rds")
check counts and metadata info
dim(luad_tcga_counts)
## [1] 63856 601
dim(luad_tcga_metadata)
## [1] 601 840
nchar(colnames(luad_tcga_counts)[1])
## [1] 36
ids<- substrRight(colnames(luad_tcga_counts), 36)
ids<- str_replace_all(ids, "[[:punct:]]", "-")
colnames(luad_tcga_counts) <- ids
luad_tcga_counts_order <- luad_tcga_counts[,order(ids)]
luad_tcga_metadata_order<- luad_tcga_metadata[order(luad_tcga_metadata$external_id), ]
identical(colnames(luad_tcga_counts_order), luad_tcga_metadata_order$external_id)
## [1] TRUE
colnames(luad_tcga_counts_order)<- luad_tcga_metadata_order$tcga_barcode
saveRDS(luad_tcga_counts_order, "/home/rstudio/data/luad_tcga_count_ordered.rds")
saveRDS(luad_tcga_metadata_order, "/home/rstudio/data/luad_tcga_metadata_ordered.rds")
vst_table <- vst(as.matrix(luad_tcga_counts_order))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
x<- summary(pca.tumor)
y<- x$importance
y[,1:10]
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 57.84925 52.53246 38.85727 34.60496 32.78108 29.92140
## Proportion of Variance 0.11415 0.09414 0.05150 0.04085 0.03666 0.03054
## Cumulative Proportion 0.11415 0.20829 0.25979 0.30064 0.33730 0.36784
## PC7 PC8 PC9 PC10
## Standard deviation 28.22511 26.33350 25.44943 21.65315
## Proportion of Variance 0.02717 0.02365 0.02209 0.01599
## Cumulative Proportion 0.39501 0.41867 0.44076 0.45675
#recount3_rse_LIHC@colData
nt <- luad_tcga_metadata_order$tcga_barcode[luad_tcga_metadata_order$cgc_sample_sample_type == "Solid Tissue Normal"]
tumor_norm <- ifelse(luad_tcga_metadata_order$tcga_barcode%in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of LUAD", xlab = "PC1 (11.42%)", ylab = "PC2 (9.414%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")
#recount3_rse_LIHC@colData
sex <- luad_tcga_metadata_order$tcga_barcode[luad_tcga_metadata_order$cgc_case_gender == "FEMALE"]
tumor_norm <- ifelse(luad_tcga_metadata_order$tcga_barcode %in% sex, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of LUAD", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("male", "female"), pch = 21, pt.bg = c("red", "black"), col = "black")
#recount3_rse_LIHC@colData
age <- luad_tcga_metadata_order$tcga_barcode[luad_tcga_metadata_order$xml_days_to_birth < -20000]
tumor_norm <- ifelse(luad_tcga_metadata_order$tcga_barcode %in% age, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm , main = "PCA of LUAD", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">-2000 day until birth", ">-2000 day until birth"), pch = 21, pt.bg = c("red", "black"), col = "black")
list <- names(pca.tumor$x[, 2] )[pca.tumor$x[, 2] > 50]
metadata <- as.data.frame(luad_tcga_metadata_order)
metadata_test <- metadata[luad_tcga_metadata_order$tcga_barcode %in% list, ]
I don’ see a reason for the PC2 in the metadata ### Save Data ### Save Figures
Location of final scripts:
/scripts
Location of data produced:
na
Dates when operations were done:
220524
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] DESeq2_1.34.0 stringr_1.4.1
## [3] recount3_1.4.0 SummarizedExperiment_1.24.0
## [5] Biobase_2.54.0 GenomicRanges_1.46.1
## [7] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [9] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [11] MatrixGenerics_1.6.0 matrixStats_0.62.0
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 bit64_4.0.5 filelock_1.0.2
## [4] RColorBrewer_1.1-3 httr_1.4.4 tools_4.1.3
## [7] bslib_0.4.0 utf8_1.2.2 R6_2.5.1
## [10] colorspace_2.0-3 DBI_1.1.3 tidyselect_1.1.2
## [13] bit_4.0.4 curl_4.3.2 compiler_4.1.3
## [16] cli_3.4.1 DelayedArray_0.20.0 rtracklayer_1.54.0
## [19] sass_0.4.2 scales_1.2.1 genefilter_1.76.0
## [22] rappdirs_0.3.3 digest_0.6.29 Rsamtools_2.10.0
## [25] rmarkdown_2.16 R.utils_2.12.0 XVector_0.34.0
## [28] pkgconfig_2.0.3 htmltools_0.5.3 sessioninfo_1.2.2
## [31] highr_0.9 dbplyr_2.2.1 fastmap_1.1.0
## [34] rlang_1.0.6 rstudioapi_0.13 RSQLite_2.2.17
## [37] jquerylib_0.1.4 BiocIO_1.4.0 generics_0.1.3
## [40] jsonlite_1.8.0 BiocParallel_1.28.3 dplyr_1.0.10
## [43] R.oo_1.25.0 RCurl_1.98-1.8 magrittr_2.0.3
## [46] GenomeInfoDbData_1.2.7 Matrix_1.5-1 Rcpp_1.0.9
## [49] munsell_0.5.0 fansi_1.0.3 lifecycle_1.0.2
## [52] R.methodsS3_1.8.2 stringi_1.7.8 yaml_2.3.5
## [55] zlibbioc_1.40.0 BiocFileCache_2.2.1 grid_4.1.3
## [58] blob_1.2.3 parallel_4.1.3 crayon_1.5.2
## [61] lattice_0.20-45 Biostrings_2.62.0 splines_4.1.3
## [64] annotate_1.72.0 KEGGREST_1.34.0 locfit_1.5-9.6
## [67] knitr_1.40 pillar_1.8.1 rjson_0.2.21
## [70] geneplotter_1.72.0 XML_3.99-0.10 glue_1.6.2
## [73] evaluate_0.16 data.table_1.14.2 vctrs_0.4.2
## [76] png_0.1-7 gtable_0.3.1 purrr_0.3.4
## [79] assertthat_0.2.1 cachem_1.0.6 ggplot2_3.3.6
## [82] xfun_0.33 xtable_1.8-4 restfulr_0.0.15
## [85] survival_3.3-1 tibble_3.1.8 GenomicAlignments_1.30.0
## [88] AnnotationDbi_1.56.2 memoise_2.0.1